import requests
from lxml import etree
import json
import xlrd
import xlwt


headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36\
                 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
url = "http://www.qiushibaike.com/8hr/page/1/"
content = requests.get(url, headers=headers).text

html = etree.HTML(content)
node_list = html.xpath('//li[contains(@id, "qiushi_tag_")]')

wk = xlwt.Workbook()
sheet1 = wk.add_sheet("数据", cell_overwrite_ok=True)
j = 0 
for node in node_list:
    # xpath返回的列表，这个列表就这一个参数，用索引方式取出来，用户名
    username = node.xpath('.//span[contains(@class, "recmd-name")]')[0].text
    # 图片连接
    image = node.xpath('./a/img/@src')[0] # [0]
    # 取出标签下的内容,段子内容
    content = node.xpath('.//a[@class="recmd-content"]')[0].text
    # 取出标签里包含的内容，点赞
    zan = node.xpath('.//div[@class="recmd-num"]/span[1]')[0].text

    
    
    
    sheet1.write(j, 0, username)  
    sheet1.write(j, 1, image)  
    sheet1.write(j, 2, content)  
    sheet1.write(j, 3, zan)  
    j += 1

    wk.save("./duanzi.xlsx")